/*******************************************************************************
Name: create_sector_indices
*******************************************************************************/


clear

capture log close

log using  "${logdir}3-create_sector_indices.log", replace


forvalues year = 1970(10)2000 {
	
use "${rawdir}ipums_`year'.dta", clear

drop if metarea == 0
keep if empstat == 1


/*----------------------------------------------------------------------*/
*BY INDUSTRY
/*----------------------------------------------------------------------*/


gen ind_cat = 0
replace ind_cat = 1 if ind1990 >= 10 & ind1990 <= 32
replace ind_cat = 2 if ind1990 >= 40 & ind1990 <= 50
replace ind_cat = 3 if ind1990 >= 60 & ind1990 <= 60
replace ind_cat = 4 if ind1990 >= 100 & ind1990 <= 222
replace ind_cat = 5 if ind1990 >= 230 & ind1990 <= 392
replace ind_cat = 6 if ind1990 >= 400 & ind1990 <= 472
replace ind_cat = 7 if ind1990 >= 500 & ind1990 <= 571
replace ind_cat = 8 if ind1990 >= 580 & ind1990 <= 691
replace ind_cat = 9 if ind1990 >= 700 & ind1990 <= 712
replace ind_cat = 10 if ind1990 >= 721 & ind1990 <= 760
replace ind_cat = 11 if ind1990 >= 761 & ind1990 <= 810
replace ind_cat = 12 if ind1990 >= 812 & ind1990 <= 840
replace ind_cat = 13 if ind1990 >= 842 & ind1990 <= 860
replace ind_cat = 14 if ind1990 >= 812 & ind1990 <= 893 & ind_cat == 0
replace ind_cat = 15 if ind1990 >= 900 & ind1990 <= 932

tab ind_cat

preserve

drop if ind_cat == 0

egen ind_total = sum(perwt), by(metarea ind_cat)
egen msa_total = sum(perwt), by(metarea)

egen ind_count = count(year), by(metarea ind_cat)
egen msa_count = count(year), by(metarea)

gen ind_share = ind_total/msa_total


collapse (mean) ind_share ind_count msa_count, by(metarea ind_cat year)

save "${datadir}ind_shares`year'.dta", replace


restore

/*----------------------------------------------------------------------*/
*BY OCCUPATION
/*----------------------------------------------------------------------*/


gen occ_cat = 0 if empstat == 1
replace occ_cat = 1 if occ1990 >= 3 & occ1990 <= 37
replace occ_cat = 2 if occ1990 >= 43 & occ1990 <= 200
replace occ_cat = 3 if occ1990 >= 203 & occ1990 <= 235
replace occ_cat = 4 if occ1990 >= 243 & occ1990 <= 283
replace occ_cat = 5 if occ1990 >= 303 & occ1990 <= 389
replace occ_cat = 6 if occ1990 >= 405 & occ1990 <= 469
replace occ_cat = 7 if occ1990 >= 473 & occ1990 <= 498
replace occ_cat = 8 if occ1990 >= 503 & occ1990 <= 699
replace occ_cat = 9 if occ1990 >= 703 & occ1990 <= 889
replace occ_cat = 10 if occ1990 == 905

tab occ_cat, mi
drop if occ_cat == 0

egen occ_total = sum(perwt), by(metarea occ_cat)
egen msa_total = sum(perwt), by(metarea)

egen occ_count = count(year), by(metarea occ_cat)
egen msa_count = count(year), by(metarea)

gen occ_share = occ_total/msa_total






collapse (mean) occ_share occ_count msa_count, by(metarea occ_cat year)

save "${datadir}occ_shares`year'.dta", replace

}

/*----------------------------------------------------------------------*/
/*----------------------------------------------------------------------*/
*CREATE 1970 WEIGHTS
/*----------------------------------------------------------------------*/
/*----------------------------------------------------------------------*/

/*----------------------------------------------------------------------*/
*BY INDUSTRY
/*----------------------------------------------------------------------*/

use "${rawdir}ipums_1970.dta", clear


drop if metarea == 0
keep if age >= 16 & age <= 64
keep if (race == 1 | race == 2) & hispan == 0

keep if gq == 1
keep if empstat == 1

gen cohort_cat = 0
replace cohort_cat = 4 if age >= 24 & age < 34
replace cohort_cat = 3 if age >= 34 & age < 44
replace cohort_cat = 2 if age >= 44 & age < 54
replace cohort_cat = 1 if age >= 54 & age <= 64

keep if cohort_cat > 0

gen educ_mod = 0
replace educ_mod = 0 if educd == 0
replace educ_mod = 0 if educd == 1
replace educ_mod = 0 if educd == 2 
replace educ_mod = 2 if educd == 10 
replace educ_mod = 0 if educd == 11 
replace educ_mod = 0 if educd == 12
replace educ_mod = 2.5 if educd == 13
replace educ_mod = 1 if educd == 14
replace educ_mod = 2 if educd == 15
replace educ_mod = 3 if educd == 16
replace educ_mod = 4 if educd == 17
replace educ_mod = 6.5 if educd == 20
replace educ_mod = 5.5 if educd == 21
replace educ_mod = 5 if educd == 22
replace educ_mod = 6 if educd == 23
replace educ_mod = 7.5 if educd == 24
replace educ_mod = 7 if educd == 25
replace educ_mod = 8 if educd == 26
replace educ_mod = 9 if educd == 30
replace educ_mod = 10 if educd == 40
replace educ_mod = 11 if educd == 50
replace educ_mod = 12 if educd == 60
replace educ_mod = 11.5 if educd == 61
replace educ_mod = 12 if educd == 62
replace educ_mod = 12.5 if educd == 65
replace educ_mod = 13 if educd == 70
replace educ_mod = 13 if educd == 71
replace educ_mod = 14 if educd == 80
replace educ_mod = 14 if educd == 81
replace educ_mod = 14 if educd == 82
replace educ_mod = 14 if educd == 83
replace educ_mod = 15 if educd == 90
replace educ_mod = 16 if educd == 100
replace educ_mod = 16 if educd == 101
replace educ_mod = 16 if educd == 110
replace educ_mod = 16 if educd == 111
replace educ_mod = 16 if educd == 112
replace educ_mod = 16 if educd == 113
replace educ_mod = 16 if educd == 114
replace educ_mod = 16 if educd == 115
replace educ_mod = 16 if educd == 116

gen educ_cat = .
replace educ_cat = 1 if educ_mod < 12
replace educ_cat = 2 if educ_mod >= 12 & educ_mod < 16
replace educ_cat = 3 if educ_mod == 16

drop if educ_cat == .


*census regions
rename region division
gen region = floor(division/10)

replace division = 21 if metarea == 164
replace region = 2 if metarea == 164

replace division = 31 if metarea == 340
replace region = 3 if metarea == 340

replace division = 32 if metarea == 452
replace region = 3 if metarea == 452

replace division = 31 if metarea == 916
replace region = 3 if metarea == 916

drop if region == 9


preserve

gen ind_cat = 0 if empstat == 1
replace ind_cat = 1 if ind1990 >= 10 & ind1990 <= 32
replace ind_cat = 2 if ind1990 >= 40 & ind1990 <= 50
replace ind_cat = 3 if ind1990 >= 60 & ind1990 <= 60
replace ind_cat = 4 if ind1990 >= 100 & ind1990 <= 222
replace ind_cat = 5 if ind1990 >= 230 & ind1990 <= 392
replace ind_cat = 6 if ind1990 >= 400 & ind1990 <= 472
replace ind_cat = 7 if ind1990 >= 500 & ind1990 <= 571
replace ind_cat = 8 if ind1990 >= 580 & ind1990 <= 691
replace ind_cat = 9 if ind1990 >= 700 & ind1990 <= 712
replace ind_cat = 10 if ind1990 >= 721 & ind1990 <= 760
replace ind_cat = 11 if ind1990 >= 761 & ind1990 <= 810
replace ind_cat = 12 if ind1990 >= 812 & ind1990 <= 840
replace ind_cat = 13 if ind1990 >= 842 & ind1990 <= 860
replace ind_cat = 14 if ind1990 >= 812 & ind1990 <= 893 & ind_cat == 0
replace ind_cat = 15 if ind1990 >= 900 & ind1990 <= 932

tab ind_cat, mi

drop if ind_cat == 0


egen count_group = count(year), by(metarea race cohort_cat educ_cat sex)
egen count_metarea = count(year), by(metarea)

egen total_weight = sum(perwt*(ind_cat != 0 & ind_cat !=.)), by(metarea race cohort_cat educ_cat sex)

forvalues i = 1(1)15{
egen total`i'_weight = sum(perwt*(ind_cat == `i')), by(metarea race cohort_cat educ_cat sex)
gen frac_ind_`i' = total`i'_weight/total_weight

drop total`i'_weight
}


bysort metarea race educ_cat cohort_cat sex: gen first = _n == 1

keep if first

keep metarea race educ_cat cohort_cat sex count_group count_metarea frac_ind_*

reshape long frac_ind_, i(metarea race educ_cat cohort_cat sex) j(ind_cat)

rename frac_ind share_ind

save "${datadir}ind_weights1970.dta", replace

restore

/*----------------------------------------------------------------------*/
*BY INDUSTRY
/*----------------------------------------------------------------------*/


gen occ_cat = 0 if empstat == 1
replace occ_cat = 1 if occ1990 >= 3 & occ1990 <= 37
replace occ_cat = 2 if occ1990 >= 43 & occ1990 <= 200
replace occ_cat = 3 if occ1990 >= 203 & occ1990 <= 235
replace occ_cat = 4 if occ1990 >= 243 & occ1990 <= 283
replace occ_cat = 5 if occ1990 >= 303 & occ1990 <= 389
replace occ_cat = 6 if occ1990 >= 405 & occ1990 <= 469
replace occ_cat = 7 if occ1990 >= 473 & occ1990 <= 498
replace occ_cat = 8 if occ1990 >= 503 & occ1990 <= 699
replace occ_cat = 9 if occ1990 >= 703 & occ1990 <= 889
replace occ_cat = 10 if occ1990 == 905

tab occ_cat, mi
drop if occ_cat == 0


egen count_group = count(year), by(metarea race cohort_cat educ_cat sex)
egen count_metarea = count(year), by(metarea)

egen total_weight = sum(perwt*(occ_cat != 0 & occ_cat !=.)), by(metarea race cohort_cat educ_cat sex)

forvalues i = 1(1)10{
egen total`i'_weight = sum(perwt*(occ_cat == `i')), by(metarea race cohort_cat educ_cat sex)
gen frac_occ_`i' = total`i'_weight/total_weight

drop total`i'_weight
}


bysort metarea race educ_cat cohort_cat sex: gen first = _n == 1

keep if first

keep metarea race educ_cat cohort_cat sex count_group count_metarea frac_occ_*

reshape long frac_occ_, i(metarea race educ_cat cohort_cat sex) j(occ_cat)

rename frac_occ share_occ

save "${datadir}occ_weights1970.dta", replace


/*----------------------------------------------------------------------*/
/*----------------------------------------------------------------------*/
*CREATE INDICES
/*----------------------------------------------------------------------*/
/*----------------------------------------------------------------------*/



/*----------------------------------------------------------------------*/
*CREATE INDUSTRY INDEX
/*----------------------------------------------------------------------*/


use "${datadir}ind_shares1970.dta", clear
append using "${datadir}ind_shares1980.dta"
append using "${datadir}ind_shares1990.dta"
append using "${datadir}ind_shares2000.dta"

gen order = 1
replace order = 2 if year == 1980
replace order = 3 if year == 1990
replace order = 4 if year == 2000

gen metareaXind_cat = metarea*100 + ind_cat

tsset metareaXind_cat order

gen ln_ind_share = ln(ind_share)
gen ln_ind_share_1d = ln_ind_share - L1.ln_ind_share
gen ln_ind_share_2d = ln_ind_share - L2.ln_ind_share
gen ln_ind_share_3d = ln_ind_share - L3.ln_ind_share


keep ln_ind_share_1d ln_ind_share_2d ln_ind_share_3d metarea ind_cat year

sort metarea ind_cat

joinby metarea ind_cat using "${datadir}ind_weights1970.dta"

egen ln_share_ind_1d = sum(share_ind*ln_ind_share_1d), by(metarea race educ_cat cohort_cat sex year)
replace ln_share_ind_1d = . if year == 1970

egen ln_share_ind_2d = sum(share_ind*ln_ind_share_2d), by(metarea race educ_cat cohort_cat sex year)
replace ln_share_ind_2d = . if year == 1970 | year == 1980

egen ln_share_ind_3d = sum(share_ind*ln_ind_share_3d), by(metarea race educ_cat cohort_cat sex year)
replace ln_share_ind_3d = . if year != 2000

collapse (mean) ln_share_ind_1d ln_share_ind_2d ln_share_ind_3d, by(metarea race educ_cat cohort_cat sex year)

drop if race == .

save "${datadir}ind_index.dta", replace


/*----------------------------------------------------------------------*/
*CREATE OCCUPATION INDEX
/*----------------------------------------------------------------------*/


use "${datadir}occ_shares1970.dta", clear
append using "${datadir}occ_shares1980.dta"
append using "${datadir}occ_shares1990.dta"
append using "${datadir}occ_shares2000.dta"

gen order = 1
replace order = 2 if year == 1980
replace order = 3 if year == 1990
replace order = 4 if year == 2000

gen metareaXocc_cat = metarea*100 + occ_cat

tsset metareaXocc_cat order

gen ln_occ_share = ln(occ_share)
gen ln_occ_share_1d = ln_occ_share - L1.ln_occ_share
gen ln_occ_share_2d = ln_occ_share - L2.ln_occ_share
gen ln_occ_share_3d = ln_occ_share - L3.ln_occ_share


keep ln_occ_share_1d ln_occ_share_2d ln_occ_share_3d metarea occ_cat year

sort metarea occ_cat

joinby metarea occ_cat using "${datadir}occ_weights1970.dta"

egen ln_share_occ_1d = sum(share_occ*ln_occ_share_1d), by(metarea race educ_cat cohort_cat sex year)
replace ln_share_occ_1d = . if year == 1970

egen ln_share_occ_2d = sum(share_occ*ln_occ_share_2d), by(metarea race educ_cat cohort_cat sex year)
replace ln_share_occ_2d = . if year == 1970 | year == 1980

egen ln_share_occ_3d = sum(share_occ*ln_occ_share_3d), by(metarea race educ_cat cohort_cat sex year)
replace ln_share_occ_3d = . if year != 2000

collapse (mean) ln_share_occ_1d ln_share_occ_2d ln_share_occ_3d, by(metarea race educ_cat cohort_cat sex year)

drop if race == .

save "${datadir}occ_index.dta", replace

log close



